suppressMessages(suppressWarnings(source("config.R")))
FD_OUT
'/data/reddylab/Kuei/out'
###
fdiry = file.path(FD_OUT, "Dex_PROcap", "run_homer", "out_annotate", "annoTSS")
fname = "tss_count_raw.txt"
fpath = file.path(fdiry, fname)
###
dat_raw = read_tsv(fpath)
head(dat_raw, 3)
Parsed with column specification:
cols(
.default = col_character(),
Start = col_double(),
End = col_double(),
`Peak Score` = col_double(),
`Distance to TSS` = col_double(),
`Entrez ID` = col_double(),
`/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t00/ Tag Count in given bp (9384553.0 Total, normalization factor = 1, effective total = 10000000)` = col_double(),
`/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t15/ Tag Count in given bp (9252351.5 Total, normalization factor = 1, effective total = 10000000)` = col_double(),
`/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t60/ Tag Count in given bp (13741782.5 Total, normalization factor = 1, effective total = 10000000)` = col_double()
)
See spec(...) for full column specifications.
| PeakID (cmd=annotatePeaks.pl /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_merge/mergeTSS/tss_merge.txt hg38 -strand + -fragLength 1 -raw -d /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t00/ /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t15/ /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t60/) | Chr | Start | End | Strand | Peak Score | Focus Ratio/Region Size | Annotation | Detailed Annotation | Distance to TSS | ... | Nearest Unigene | Nearest Refseq | Nearest Ensembl | Gene Name | Gene Alias | Gene Description | Gene Type | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t00/ Tag Count in given bp (9384553.0 Total, normalization factor = 1, effective total = 10000000) | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t15/ Tag Count in given bp (9252351.5 Total, normalization factor = 1, effective total = 10000000) | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t60/ Tag Count in given bp (13741782.5 Total, normalization factor = 1, effective total = 10000000) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Merged-chr14-49862550-3 | chr14 | 49862475 | 49862625 | + | 5036.000 | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap00/out.tss.txt|/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap15/out.tss.txt|/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap60/out.tss.txt | TTS (NR_027260) | TTS (NR_027260) | 299 | ... | NA | NR_027260 | ENSG00000274012 | RN7SL2 | 7L1C|7L30.1|7SL1c|RNSRP2 | RNA component of signal recognition particle 7SL2 | scRNA | 6259 | 4545.5 | 4832.5 |
| Merged-chr14-49862849-3 | chr14 | 49862774 | 49862924 | - | 4960.967 | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap00/out.tss.txt|/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap15/out.tss.txt|/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap60/out.tss.txt | promoter-TSS (NR_027260) | promoter-TSS (NR_027260) | 0 | ... | NA | NR_027260 | ENSG00000274012 | RN7SL2 | 7L1C|7L30.1|7SL1c|RNSRP2 | RNA component of signal recognition particle 7SL2 | scRNA | 4608 | 3442.5 | 4541.0 |
| Merged-chr17-19188010-3 | chr17 | 19187935 | 19188085 | + | 3358.767 | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap00/out.tss.txt|/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap15/out.tss.txt|/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap60/out.tss.txt | promoter-TSS (NR_006880) | promoter-TSS (NR_006880) | -6 | ... | NA | NR_006880 | ENSG00000263934 | SNORD3A | RNU3|U3 | small nucleolar RNA, C/D box 3A | snoRNA | 4068 | 3830.5 | 2131.5 |
###
fdiry = file.path(FD_OUT, "Dex_PROcap", "run_homer", "out_annotate", "annoTSS")
fname = "tss_count_rlg.txt"
fpath = file.path(fdiry, fname)
###
dat_rlg = read_tsv(fpath)
head(dat_rlg, 3)
Parsed with column specification:
cols(
.default = col_character(),
Start = col_double(),
End = col_double(),
`Peak Score` = col_double(),
`Distance to TSS` = col_double(),
`Entrez ID` = col_double(),
`/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t00/ Tag Count in given bp (9384553.0 Total, normalization factor = 1, effective total = 10000000)` = col_double(),
`/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t15/ Tag Count in given bp (9252351.5 Total, normalization factor = 1, effective total = 10000000)` = col_double(),
`/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t60/ Tag Count in given bp (13741782.5 Total, normalization factor = 1, effective total = 10000000)` = col_double()
)
See spec(...) for full column specifications.
| PeakID (cmd=annotatePeaks.pl /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_merge/mergeTSS/tss_merge.txt hg38 -strand + -fragLength 1 -rlog -d /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t00/ /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t15/ /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t60/) | Chr | Start | End | Strand | Peak Score | Focus Ratio/Region Size | Annotation | Detailed Annotation | Distance to TSS | ... | Nearest Unigene | Nearest Refseq | Nearest Ensembl | Gene Name | Gene Alias | Gene Description | Gene Type | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t00/ Tag Count in given bp (9384553.0 Total, normalization factor = 1, effective total = 10000000) | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t15/ Tag Count in given bp (9252351.5 Total, normalization factor = 1, effective total = 10000000) | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t60/ Tag Count in given bp (13741782.5 Total, normalization factor = 1, effective total = 10000000) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Merged-chr14-49862550-3 | chr14 | 49862475 | 49862625 | + | 5036.000 | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap00/out.tss.txt|/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap15/out.tss.txt|/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap60/out.tss.txt | TTS (NR_027260) | TTS (NR_027260) | 299 | ... | NA | NR_027260 | ENSG00000274012 | RN7SL2 | 7L1C|7L30.1|7SL1c|RNSRP2 | RNA component of signal recognition particle 7SL2 | scRNA | 12.64921 | 12.18962 | 12.15269 |
| Merged-chr14-49862849-3 | chr14 | 49862774 | 49862924 | - | 4960.967 | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap00/out.tss.txt|/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap15/out.tss.txt|/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap60/out.tss.txt | promoter-TSS (NR_027260) | promoter-TSS (NR_027260) | 0 | ... | NA | NR_027260 | ENSG00000274012 | RN7SL2 | 7L1C|7L30.1|7SL1c|RNSRP2 | RNA component of signal recognition particle 7SL2 | scRNA | 12.20911 | 11.79050 | 12.05929 |
| Merged-chr17-19188010-3 | chr17 | 19187935 | 19188085 | + | 3358.767 | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap00/out.tss.txt|/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap15/out.tss.txt|/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap60/out.tss.txt | promoter-TSS (NR_006880) | promoter-TSS (NR_006880) | -6 | ... | NA | NR_006880 | ENSG00000263934 | SNORD3A | RNU3|U3 | small nucleolar RNA, C/D box 3A | snoRNA | 12.02521 | 11.93491 | 10.98238 |
2.10. Summarize Annotation across the peaks¶
options(repr.plot.width=5, repr.plot.height=3)
dat = dat_rlg
txt = dat$Annotation
txt = str_remove(string = txt, pattern = "\\(.*")
qplot(txt) + theme_bw() + theme(axis.text.x = element_text(hjust=1, vjust=0.5, angle=90, size=10))
2.11. Visualize¶
dat = dat_rlg
dat = dat %>% mutate(annotation = str_remove(string = Annotation, pattern = " \\(.*"))
dat = dat %>% dplyr::select(
Chr, Start, End, Strand, `Gene Name`, annotation,
starts_with("/gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer")) %>%
unite("grange", Start:End, sep = "-") %>%
unite("loc", Chr:grange, sep = ":")
colnames(dat) = c("loc", "strand", "gene", "annotation", "t00", "t15", "t60")
dat = dat %>% mutate(d15 = t15 - t00, d60 = t60 - t00, avg=mean(t00 + t15 + t60))
dat = dat %>% mutate(label = paste0(gene, " (", annotation, ")"))
head(dat)
| loc | strand | gene | annotation | t00 | t15 | t60 | d15 | d60 | avg | label |
|---|---|---|---|---|---|---|---|---|---|---|
| chr14:49862475-49862625 | + | RN7SL2 | TTS | 12.64921 | 12.18962 | 12.15269 | -0.45958594 | -0.4965207 | 14.33395 | RN7SL2 (TTS) |
| chr14:49862774-49862924 | - | RN7SL2 | promoter-TSS | 12.20911 | 11.79050 | 12.05929 | -0.41860891 | -0.1498167 | 14.33395 | RN7SL2 (promoter-TSS) |
| chr17:19187935-19188085 | + | SNORD3A | promoter-TSS | 12.02521 | 11.93491 | 10.98238 | -0.09030435 | -1.0428297 | 14.33395 | SNORD3A (promoter-TSS) |
| chrM:5076-5226 | - | MIR12136 | Intergenic | 11.21550 | 11.52099 | 10.67755 | 0.30548606 | -0.5379560 | 14.33395 | MIR12136 (Intergenic) |
| chr14:49586803-49586954 | - | RN7SL1 | promoter-TSS | 12.08270 | 11.50629 | 11.49304 | -0.57641791 | -0.5896630 | 14.33395 | RN7SL1 (promoter-TSS) |
| chr14:49586499-49586649 | + | RN7SL1 | promoter-TSS | 11.99528 | 11.44538 | 11.71259 | -0.54990394 | -0.2826890 | 14.33395 | RN7SL1 (promoter-TSS) |
options(repr.plot.width=5, repr.plot.height=3)
ggplot(dat, aes(x=d15, y=d60, color=annotation)) +
geom_point(size=0.1, alpha=0.5) +
labs(x="t15 - t00", y="t60 - t00") +
theme_bw()
options(repr.plot.width=5, repr.plot.height=3)
tmp = dat %>% dplyr::filter(annotation=="promoter-TSS")
ggplot(tmp, aes(x=d15, y=d60, color=annotation)) +
geom_point(size=0.1, alpha=0.5) +
labs(x="t15 - t00", y="t60 - t00") +
theme_bw()
2.11.1. Interaction¶
fig = plot_ly(
data = tmp,
x = ~d15, y = ~d60,
type = "scatter",
mode = 'markers',
marker = list(size=3, opacity=0.5),
hoverinfo = 'text',
text = ~label,
width = 500,
height = 500)
fig
Warning message:
“`arrange_()` is deprecated as of dplyr 0.7.0.
Please use `arrange()` instead.
See vignette('programming') for more help
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.”
chrom = "chr5"
chromStart = 143275931
chromEnd = 143437512
dat = dat_raw
dat %>% dplyr::filter(Chr=="chr5", Start > 143275931, End < 143437512)
| PeakID (cmd=annotatePeaks.pl /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_merge/mergeTSS/tss_merge.txt hg38 -strand + -fragLength 1 -raw -d /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t00/ /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t15/ /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t60/) | Chr | Start | End | Strand | Peak Score | Focus Ratio/Region Size | Annotation | Detailed Annotation | Distance to TSS | ... | Nearest Unigene | Nearest Refseq | Nearest Ensembl | Gene Name | Gene Alias | Gene Description | Gene Type | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t00/ Tag Count in given bp (9384553.0 Total, normalization factor = 1, effective total = 10000000) | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t15/ Tag Count in given bp (9252351.5 Total, normalization factor = 1, effective total = 10000000) | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/tags/procap_t60/ Tag Count in given bp (13741782.5 Total, normalization factor = 1, effective total = 10000000) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Merged-chr5-143405356-1 | chr5 | 143405281 | 143405431 | + | 21.1 | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap60/out.tss.txt | promoter-TSS (NM_001364184) | promoter-TSS (NM_001364184) | -1 | ... | Hs.122926 | NM_000176 | ENSG00000113580 | NR3C1 | GCCR|GCR|GCRST|GR|GRL | nuclear receptor subfamily 3 group C member 1 | protein-coding | 1.5 | 1.5 | 23.5 |
| Merged-chr5-143405458-1 | chr5 | 143405383 | 143405533 | - | 19.3 | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap60/out.tss.txt | promoter-TSS (NM_001364184) | promoter-TSS (NM_001364184) | -103 | ... | Hs.122926 | NM_000176 | ENSG00000113580 | NR3C1 | GCCR|GCR|GCRST|GR|GRL | nuclear receptor subfamily 3 group C member 1 | protein-coding | 1.5 | 1.0 | 24.5 |
| Merged-chr5-143404994-1 | chr5 | 143404919 | 143405069 | + | 15.3 | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap60/out.tss.txt | promoter-TSS (NM_001364184) | promoter-TSS (NM_001364184) | 361 | ... | Hs.122926 | NM_000176 | ENSG00000113580 | NR3C1 | GCCR|GCR|GCRST|GR|GRL | nuclear receptor subfamily 3 group C member 1 | protein-coding | 0.0 | 0.0 | 21.0 |
| Merged-chr5-143405074-1 | chr5 | 143404999 | 143405149 | - | 14.6 | /gpfs/fs1/data/reddylab/Kuei/Dex_PROcap/run_homer/out_findTSS/tss_cap60/out.tss.txt | promoter-TSS (NM_001364184) | promoter-TSS (NM_001364184) | 281 | ... | Hs.122926 | NM_000176 | ENSG00000113580 | NR3C1 | GCCR|GCR|GCRST|GR|GRL | nuclear receptor subfamily 3 group C member 1 | protein-coding | 0.0 | 0.0 | 21.0 |
tmp %>% dplyr::filter(d15 > 0, d60 > 0) %>% na.omit %>% head
| loc | strand | gene | annotation | t00 | t15 | t60 | d15 | d60 | avg | label |
|---|---|---|---|---|---|---|---|---|---|---|
| chr1:28505962-28506112 | + | RCC1 | promoter-TSS | 10.461414 | 10.92286 | 11.23594 | 0.46144687 | 0.7745303 | 14.33395 | RCC1 (promoter-TSS) |
| chrM:7730-7880 | + | MIR12136 | promoter-TSS | 10.709997 | 10.80312 | 11.02373 | 0.09312409 | 0.3137342 | 14.33395 | MIR12136 (promoter-TSS) |
| chrM:7831-7981 | - | MIR12136 | promoter-TSS | 10.361701 | 10.50908 | 10.78235 | 0.14737943 | 0.4206513 | 14.33395 | MIR12136 (promoter-TSS) |
| chr1:28505989-28506142 | - | RCC1 | promoter-TSS | 10.186032 | 10.64254 | 10.76907 | 0.45650418 | 0.5830342 | 14.33395 | RCC1 (promoter-TSS) |
| chr6:73520957-73521107 | - | EEF1A1 | promoter-TSS | 10.063260 | 10.27459 | 10.61019 | 0.21133336 | 0.5469263 | 14.33395 | EEF1A1 (promoter-TSS) |
| chr7:26200650-26200800 | + | HNRNPA2B1 | promoter-TSS | 9.857807 | 9.90849 | 10.35198 | 0.05068313 | 0.4941687 | 14.33395 | HNRNPA2B1 (promoter-TSS) |
tmp %>% dplyr::filter(gene == "NR3C1") %>% na.omit %>% head
| loc | strand | gene | annotation | t00 | t15 | t60 | d15 | d60 | avg | label |
|---|---|---|---|---|---|---|---|---|---|---|
| chr5:143405281-143405431 | + | NR3C1 | promoter-TSS | 2.691103 | 2.690774 | 3.263305 | -3.294394e-04 | 0.5722016 | 14.33395 | NR3C1 (promoter-TSS) |
| chr5:143405383-143405533 | - | NR3C1 | promoter-TSS | 2.553483 | 2.496740 | 3.268192 | -5.674326e-02 | 0.7147090 | 14.33395 | NR3C1 (promoter-TSS) |
| chr5:143404919-143405069 | + | NR3C1 | promoter-TSS | 1.983771 | 1.983738 | 2.732771 | -3.291476e-05 | 0.7490001 | 14.33395 | NR3C1 (promoter-TSS) |
| chr5:143404999-143405149 | - | NR3C1 | promoter-TSS | 1.983771 | 1.983738 | 2.732771 | -3.291476e-05 | 0.7490001 | 14.33395 | NR3C1 (promoter-TSS) |